# createWeights.R

# Part of the replication archive for 
#
#   Bullock, John G., and Kelly Rader. 2021. "Response Options and the 
#   Measurement of Political Knowledge." Forthcoming in the British Journal 
#   of Political Science.


library(here)    # for here::here()
library(ipumsr)  # for read_ipums_ddi(), read_ipums_micro()
library(survey)  # for rake(), svytable(), svydesign()
Recode <- car::Recode

ELIMINATE_BREAKOFFS <- TRUE
ELIMINATE_CHEATERS  <- TRUE

source(here::here("R/SSI_2017_coding.R"))

filenameOutput <- here::here("data/weights.csv")



# **************************************************************************
# CREATE VARS FOR WEIGHTING FROM OUR EXPERIMENT ####
# **************************************************************************
agecat  <- Recode(age, "17:29='under 30'; 30:60='30 to 60'; 61:90='over 60'")
racecat <- Recode(raceCoalesced, "c('Asian','multiracial')='Other'")
educat  <- as.character(Recode(educ, "c('someCollege',\"Associate's\")='college no BA'; c(\"Bachelor's\",\"Master's\",'otherPostCollege')='BA or more'; NA=NA; else='HS or less'"))
regcat  <- as.character(setNames(state.region, state.name)[as.character(state)]) 
femcat  <- as.numeric(female)

our_data <- cbind.data.frame(
  originalData$psid,
  agecat,
  racecat,
  educat,
  regcat,
  femcat,
  stringsAsFactors = FALSE)

# nrow(our_data)  # 2080
our_data <- na.omit(our_data)
# nrow(our_data)  # 1961



# **************************************************************************
# IMPORT CPS DATA ####
# **************************************************************************
CPS_2017_DDI <- read_ipums_ddi(here::here('data/CPS_2017/cps_00002.xml'))
CPS_2017_original <- read_ipums_micro(CPS_2017_DDI)
CPS_2017 <- CPS_2017_original %>%
  select(CPSIDP, WTFINL, AGE, SEX, RACE, HISPAN, REGION, STATECENSUS, EDUC) %>%
  as_factor() %>%
  mutate(
    AGE = ordered(AGE),
    EDUC = ordered(EDUC),
    HISPANIC_OR_LATINO = HISPAN %in% c("Mexican", "Dominican", "Puerto Rican", "Cuban", "Central American, (excluding Salvadoran)", "Salvadoran", "South American"), 
    STATECENSUS = as.character(STATECENSUS),
    REGION4 = setNames(state.region, state.name)[STATECENSUS])
CPS_2017$REGION4[CPS_2017$STATECENSUS == "District of Columbia"] <- "South"



# **************************************************************************
# CODE CPS VARIABLES TO USE FOR WEIGHTING ####
# **************************************************************************
CPS_for_wt <- within(CPS_2017,{
  agecat  <- as.character(Recode(AGE, "17:29='under 30'; 30:60='30 to 60'; 61:85='over 60'"))
  racecat <- as.character(Recode(RACE, "'White'='White'; 'Black/Negro'='African American'; else='Other'"))
  racecat[HISPAN!="Not Hispanic"&racecat!="African American"]  <- "Hispanic"
  educat  <- as.character(Recode(EDUC, "c('Some college but no degree',\"Associate's degree, occupational/vocational program\",\"Associate's degree, academic program\")='college no BA';
                c(\"Bachelor's degree\",\"Master's degree\",'Professional school degree','Doctorate degree')='BA or more';
                else='HS or less'"))
  regcat <- as.character(REGION4)
  femcat <- as.numeric(SEX)-1
  
})
CPS_for_wt <- subset(
  x      = CPS_for_wt, 
  select = c(agecat, racecat, educat, regcat, femcat, WTFINL))



# **************************************************************************
# CREATE WEIGHTS FOR OUR SSI SAMPLE ####
# **************************************************************************

# design object for CPS with CPS weights
CPS_des_wt <- svydesign(ids = ~1, data = CPS_for_wt, weights = CPS_for_wt$WTFINL)

# weighted CPS frequencies
CPS_freq_wt <- list(svytable(~agecat,  design = CPS_des_wt),
                    svytable(~racecat, design = CPS_des_wt),
                    svytable(~educat,  design = CPS_des_wt),
                    svytable(~regcat,  design = CPS_des_wt),
                    svytable(~femcat,  design = CPS_des_wt))

# design object for our data unweighted, ignore warning
our_data_des <- svydesign(ids=~1, data=our_data)

# do the rake
our_rake <- rake(
  design             = our_data_des,
  sample.margins     = list(~agecat, ~racecat, ~educat, ~regcat, ~femcat),
  population.margins = CPS_freq_wt)

# normalize weights
wgt <- weights(our_rake)/mean(weights(our_rake))

# no need to trim, all within 0.3 - 3
summary(wgt)
# compare to CPS weights range, our range is smaller
summary(CPS_for_wt$WTFINL/mean(CPS_for_wt$WTFINL))

# design object for our data with new weights
our_data_des_wt <- svydesign(ids=~1, data=our_data, weights=wgt)

# compare, e.g., on education. weighted CPS vs our weighted data vs our 
# unweighted data. looks good.
prop.table(svytable(~educat, design = CPS_des_wt))
prop.table(svytable(~educat, design = our_data_des_wt))
prop.table(svytable(~educat, design = our_data_des))

# Create final data frame for export. Check to ensure that all psid values 
# in the final data frame are also in originalData.  [2020 07 18]
final_wgt <- cbind.data.frame(our_data$`originalData$psid`, wgt)
names(final_wgt) <- c("psid", "weight")
stopifnot( all(final_wgt$psid %in% originalData$psid) )

# Save the original data frame.  [2020 07 18]
write.csv(final_wgt, filenameOutput, row.names = FALSE)



